package com.bdqn.spark.chapter05.value

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object Spark07_RDD_Operator_Transform_Test {
  def main(args: Array[String]): Unit = {
    val sparkConf = new SparkConf()
      .setMaster("local[*]")
      .setAppName("operator-groupby")
    val sc = new SparkContext(sparkConf)

    val lineRDD: RDD[String] = sc.textFile("input/apache.log")
    // 需求：从服务器日志数据apache.log中获取2015年5月17日的请求路径
    // 17/05/2015
    lineRDD.filter(
      line =>{
        val fields = line.split(" ")
        val dateStr: String = fields(3)
        dateStr.startsWith("17/05/2015")
      }
    ).map(
      _.split(" ")(6)
    ).collect.foreach(println)

    sc.stop()
  }
}
